%-------------------------------------------------------------------------%
% This program estimates firm values using the model and algorithm in 
% Sorkin (QJE 2018). 
%-------------------------------------------------------------------------%

clc;
clear all;
cd '\\employers\matlab\data_new';

%-------------------------------------------------------------------------%
% create looping variables for sample windows
%-------------------------------------------------------------------------%

%empty vector to store versions 
vervec = ones(35,1);

%declare first set of versions (these cover IAB pre-set windows and pooled sample)
vervec(1) = 3; 

%declare custom windows 
c = 2; %8 if including random splits
for w = 3:4:7
    for y = 1997:1:2019
        if y + (w-1)/2 <2019 && y - (w-1)/2 >1997   
            ver = y*10 + w;
            vervec(c) = ver;
            c = c+1;
        end
    end
end
clear c;

%-------------------------------------------------------------------------%
%loop over sample windows
%-------------------------------------------------------------------------%

allv = 1;


for i = allv:length(vervec) %loop starter is either allv or cusv
    
    %set version
    ver = vervec(i)

    fnamein1 = ['params_ver',num2str(ver),'.csv'];
    fnamein2 = ['connected_moves_ver',num2str(ver),'.csv'];
    fnameout1 = ['modelparameters_ver',num2str(ver),'.csv'];
    fnameout2 = ['exp_V_ver',num2str(ver),'.csv'];
    clear ver

    %-------------------------------------------------------------------------%
    % Read in data and parameters, create some variables 
    %-------------------------------------------------------------------------%

    %params = csvread('params.csv',1,0);
    params = csvread(fnamein1,1,0);
    %mover_dat = csvread('connected_moves.csv',1,0);
    mover_dat = csvread(fnamein2,1,0);

    %Mover data
    W = params(1);            % total employment in the strongly connected set
    endog_enrate = params(2); % observed value of pr of endog EN move (mean)
    endog_eerate = params(3); % observed value of pr of endog EE move (mean)
    pos_i    = 1;             % receiving establishment id position
    pos_j    = 2;             % sending establishment id position
    pos_eepr = 3;             % sending estab-year ee rate position
    pos_enpr = 4;             % sending estab-year en rate position
    pos_g    = 5;             % sending estab relative size position
    pos_f_o  = 6;             % sending estab relative offers from N position
    pos_bin  = 7;             % sending estab growth rate bin position
    pos_sect = 8;             % sending estab sector position

    %Collapse establishment data (g, f_o) over all moves
    [est_id,~,estindex]=unique(mover_dat(:,pos_j),'rows'); %index of unique senders
    N_j = max(estindex);                                   %number of unique senders
    est_dat_g = accumarray(estindex,mover_dat(:,pos_g),[],@mean);    %average g_i since g_i is constant for an estab
    est_dat_f = accumarray(estindex,mover_dat(:,pos_f_o),[],@mean);  %average f since f is constant for an estab
    est_sectr = accumarray(estindex,mover_dat(:,pos_sect),[],@mean); %average sector id since it's constant for an estab
    [sec_id,~,secindex] = unique(est_sectr); %index of unique sectors
    est_dat = [est_id,est_dat_g,est_dat_f];
    pos_g_edat   = 2;
    pos_f_o_edat = 3;

    %correlation tolerance
    corr_tol = 0.999; 

    %Row position of moves occuring at shrinking and growing estab-years
    shrink = sparse((mover_dat(:,pos_bin)<0));

    %-------------------------------------------------------------------------%
    % Estimation 
    %-------------------------------------------------------------------------%

    %Initialize outer while loop variables
    endog_eerate_new = endog_eerate;
    endog_enrate_new = endog_enrate;
    exp_V_i_0 = rand(N_j-1,1);
    exp_V_i_1 = rand(N_j-1,1);
    swexp_V_i_0 = est_dat_g(2:end).*exp_V_i_0;  %size wtd value 
    swexp_V_i_1 = est_dat_g(2:end).*exp_V_i_1;  %size wtd value
    corr_mat_1 = corrcoef(swexp_V_i_0,swexp_V_i_1);
    corr_1 = corr_mat_1(1,2);
    counter = 1;

    disp('Running outer while loop...');

    while (corr_1<corr_tol)

        %Terminate if exceeded 20 iterations
        if counter>20
            break
        end

        %Print loop index
        textcounter = ['Outer while loop iteration =',' ', num2str(counter)];
        disp(textcounter);

        %-------------------------------------------------------------------------%
        % Vector of Pr(X=1|EE=1) & Pr(X=1|EN=1)
        %-------------------------------------------------------------------------%
        p_x_ee = shrink.*max(0,(mover_dat(:,pos_eepr)-endog_eerate_new));
        p_x_en = shrink.*max(0,(mover_dat(:,pos_enpr)-endog_enrate_new));

        %-------------------------------------------------------------------------%
        % Vector of Pr(Endog=1|EE=1) and Pr(Endog=1|EN=1) 
        %-------------------------------------------------------------------------%
        p_endog_ee = shrink.*min((endog_eerate_new./mover_dat(:,pos_eepr)),1)+(1-shrink);
        p_endog_en = shrink.*min((endog_enrate_new./mover_dat(:,pos_enpr)),1)+(1-shrink);

        %-------------------------------------------------------------------------%
        % Vector of Pr(Exog=1|EE=1) and Pr(Exog=1|EN=1)
        %-------------------------------------------------------------------------%
        p_exog_ee = p_x_ee./mover_dat(:,pos_eepr);
        p_exog_en = p_x_en./mover_dat(:,pos_enpr);

        %-------------------------------------------------------------------------%
        % Step 1: Build M, and compute rho and delta. 
        %-------------------------------------------------------------------------%

        %unique pairs of sending and recieving establishments
        [pair,~,subs] = unique(mover_dat(:,pos_i:pos_j),'rows');
        rec_N = (pair(:,1)==2); %dummy for receiving "estab" = N     
        %Sum of probability weighted endogenous moves (non-zero elements of M)
        M_comp =  (1-rec_N).*(accumarray(subs,p_endog_ee))+...
                  rec_N.*(accumarray(subs,p_endog_en));
        %Sum of probability weighted job re-allocation shocks
        M_x_ee_comp = accumarray(subs,p_exog_ee);
        %Sum of probability weighted job destruction shocks 
        M_x_en_comp = accumarray(subs,p_exog_en);
        %Sum of raw moves
        move = ones(length(mover_dat),1);
        M_raw_comp = accumarray(subs,move);

        %Record (i,j) index for each pair (pair(:,1) is the receiver, pair(:,2) is
        %the sender
        est_index = (1:length(est_dat))';
        est_id = est_dat(:,1);
        recvr = pair(:,1);
        sendr = pair(:,2);
        pair_tab = table(recvr,sendr);
        est_tab  = table(est_index,est_id);
        recv_pos = join(pair_tab,est_tab,'LeftKeys',1,...
                        'RightKeys',2,'LeftVariables',2,'RightVariables',1); 
        send_pos = join(pair_tab,est_tab,'LeftKeys',2,...
                        'RightKeys',2,'LeftVariables',2,'RightVariables',1);            
        recv_index = table2array(recv_pos); %index is in column 2
        send_index = table2array(send_pos); %index is in column 2
        clear recv_pos send_pos;

        %Create M, rho and delta 
        M      = sparse(recv_index(:,2),send_index(:,2),M_comp,N_j,N_j);
        M_x_ee = sparse(recv_index(:,2),send_index(:,2),M_x_ee_comp,N_j,N_j);
        M_x_en = sparse(recv_index(:,2),send_index(:,2),M_x_en_comp,N_j,N_j);
        M_raw  = sparse(recv_index(:,2),send_index(:,2),M_raw_comp,N_j,N_j);

        %Sector-level rho and delta
        est_sectr_tab = table(est_sectr(2:end)); %sector id's

        rho = sum(M_x_ee)./sum(M_raw);
        rho = (full(rho))';             % establishment-level rho/1st element belongs to N and is NaN

        temp =  accumarray(est_sectr,rho,[],@mean); %average to sector level

       % sec_av_rho = table(sec_id(1:end-1),temp(1:end-1)); %sector id and sector avg rho     
       sec_av_rho = table(sec_id,temp); %sector id and sector avg rho 

        rho_sec_tab = join(est_sectr_tab,sec_av_rho,'LeftKeys',1,'RightKeys',1); %merge

        rho_sec = table2array(rho_sec_tab(:,2)); %sector level average rho for each estab
        rho_bar = mean(rho(2:end));     %overall average rho
        clear temp rho_sec_tab sec_av_rho;

        delta = sum(M_x_en)./sum(M_raw); 
        delta = (full(delta))';          % establishment-level delta/1st element belongs to N and is NaN

        temp = accumarray(est_sectr,delta,[],@mean); %average to sector level

        sec_av_delta = table(sec_id,temp); %sector id and sector avg delta    
        delta_sec_tab = join(est_sectr_tab,sec_av_delta,'LeftKeys',1,'RightKeys',1); %merge
        delta_sec = table2array(delta_sec_tab(:,2)); %sector level average delta for each estab    
        delta_bar = mean(delta(2:end)); %overall average delta
        clear temp delta_sec_tab sec_av_delta;

        clear M_x_ee M_x_en M_raw M_comp M_x_ee_comp M_x_en_comp M_raw_comp;

        %Create S 
        S_comp = sum(M);  %column sums of M
        s_pos  = 1:N_j;   %index of diagonal positions
        S = sparse(s_pos,s_pos,S_comp,N_j,N_j);
        clear S_comp;

        %-------------------------------------------------------------------------%
        % Step 2: Compute the fixed point of exp(V_tilde) = S^-1 * M * exp(V_tilde)
        %-------------------------------------------------------------------------%    
        exp_v_tilde_0 = rand(N_j,1);          %initial guess of exp(V_tilde)
        exp_v_tilde_1 = S\M*exp_v_tilde_0;    %initial evaluation of function

        diff_tol = 10e-9;
        its = 1;
        diff = sum(abs(exp_v_tilde_0 - exp_v_tilde_1));

        while (diff>diff_tol)  
            %Terminate if exceeded 500 iterations
            if its>500
                break
            end
            exp_v_tilde_0 = exp_v_tilde_1;
            exp_v_tilde_1 = S\M*exp_v_tilde_0;
            diff = sum(abs(exp_v_tilde_0 - exp_v_tilde_1)); 
            its = its+1;
        end

        %sign of exp_v_tilde should be all>0 or all<0
        sign_check = (exp_v_tilde_1<0);
        if((sum(sign_check)/N_j)>0 & (sum(sign_check)/N_j)<1)
            error('exp_v_tilde has not converged'); 
        end

        exp_v_tilde = exp_v_tilde_1;          %fixed point
        exp_v_tilde=abs(exp_v_tilde);         %because M is pos semi def matrix, taking abs here is innocuous

        %-------------------------------------------------------------------------%
        % Step 3: Grid search on lambda1
        %-------------------------------------------------------------------------%

        %To vectorize calculations, create new variables that exclude "N"
        est_dat_xn = est_dat(2:length(est_dat),:);
        exp_v_tilde_xn = exp_v_tilde(2:length(exp_v_tilde));

        %Condition 1 (unrelated to lambda)
        %{g_i*exp(V_tilde_i)*(1-delta)*(1-rho)}/f_i^o = C1*[exp(V_i) + exp(V_n)] 
        %Note "N" is in the first row of the vector est_dat

        cond1 = (1-delta_sec).*(1-rho_sec).*(est_dat_xn(:,pos_g_edat).*exp_v_tilde_xn)./...
                est_dat_xn(:,pos_f_o_edat);

        lambda_grid = (0.01:0.01:0.99)';
        criterion = [lambda_grid,zeros(length(lambda_grid),2)];

        for l = 1:length(lambda_grid)

        lambda1 = lambda_grid(l);   %set lambda to loop value

            %Condition 2 
            %(1/(1-lambda1))*(1/W)*sum_i(M_in*exp(V_tilde_n)) = C1*exp(V_n)
            cond2 = (1/(1-lambda1))*(1/W)*S(1,1)*exp_v_tilde(1);

            %Condition 3
            %Subtract cond2 from cond1 to get the constant times exp(V_i)
            %C1[exp(V_i)+exp(V_n)] - C1*exp(V_n) = C1*exp(V_i)
            cond3 = cond1 - cond2;

            %Truncate negative values of cond 3 by replacing them with the
            %minimum positive value of cond3
            negcond3 = (cond3<0);
            truncval = min(cond3(logical(1-negcond3)));
            cond3(negcond3) = truncval;

            %Condition 4
            %f_i/C1 = (f_i^o*(C1*exp(V_i)+C1*exp(V_n)))/C1*exp(V_i)
            cond4 = (est_dat_xn(:,pos_f_o_edat).*cond1)./cond3;

            %Get C1
            %Sum(f_i) = 1 so sum(f_i/C1) = 1/C1 
            C1 = 1/(sum(cond4));

            %Get f_i
            f_i = cond4*C1;

            %Get exp(V_n)
            exp_V_n = cond2/C1;

            %Get exp(V_i) 
            exp_V_i = cond3/C1;

            %Probability of endogenous E-E moves implied by the model
            %lambda1 *
            %sum_i{(g_i*(1-delta)*(1-rho)*sum_j{(f_j*exp(V_j)/(exp(V_j)+exp(V_i))}}

            %--------------------------------------------------------------------------
            % Part 1: Reduce the dimensionality of the problem
            % For computational tractibility this step is done by grouping estab's 
            % into 1000 categories of exp(V_i)
            %--------------------------------------------------------------------------
            grid_size = 1000;                                   %set grid size
            obs_per_bin = ceil(length(exp_V_i)/grid_size);      %estab's per grid cell
            bin_val = zeros(length(exp_V_i),1);                 %pre-create vector to store grid index
            exp_V_indx = (1:length(exp_V_i))';                  %index of values vector
            %assign grid cell numbers to each element of exp_V_i
            for j = 1:grid_size
                temp = j*(exp_V_indx<=j*obs_per_bin & exp_V_indx>=(j-1)*obs_per_bin+1);
                bin_val=bin_val+temp;
            end
            %sort in ascending order of exp_V_i. save new sort order in sortindx
            [sort_exp_V_i,sortindx] = sort(exp_V_i); 
            %assign new sort order to g,f,delta,rho vectors by merging on the original index
            sortindx_tab = table(sortindx,exp_V_indx); 
            g = table(est_dat_xn(:,pos_g_edat),exp_V_indx);
            f = table(f_i,exp_V_indx);
            delta_tab = table(delta_sec,exp_V_indx);
            rho_tab = table(rho_sec,exp_V_indx);
            sort_g = join(g,sortindx_tab,'LeftKeys',2,'RightKeys',2);
            sort_g = table2array(sort_g);
            sort_g = sortrows(sort_g,3);
            sort_f = join(f,sortindx_tab,'LeftKeys',2,'RightKeys',2);
            sort_f = table2array(sort_f);
            sort_f = sortrows(sort_f,3);
            sort_delta_tab = join(delta_tab,sortindx_tab,'LeftKeys',2,'RightKeys',2);
            sort_delta_tab = table2array(sort_delta_tab);
            sort_delta_tab = sortrows(sort_delta_tab,3);
            sort_rho_tab = join(rho_tab,sortindx_tab,'LeftKeys',2,'RightKeys',2);
            sort_rho_tab = table2array(sort_rho_tab);
            sort_rho_tab = sortrows(sort_rho_tab,3);
            %Mean establishment values, delta's, and rho's by grid cell
            bin_mean_V = accumarray(bin_val,sort_exp_V_i,[],@mean);
            bin_mean_delta = accumarray(bin_val,sort_delta_tab(:,1),[],@mean);
            bin_mean_rho = accumarray(bin_val,sort_rho_tab(:,1),[],@mean);
            %Sum establishment g's, f's by grid cell
            bin_sum_g = accumarray(bin_val,sort_g(:,1));
            bin_sum_f = accumarray(bin_val,sort_f(:,1));
            %the realized grid size differs slightly from 1000 because of rounding
            real_grid_size = max(bin_val);

            %--------------------------------------------------------------------------
            % Part 2: calculate the leave out sums: 
            % sum_j{(f_j*exp(V_j)/(exp(V_j)+exp(V_i))}
            %--------------------------------------------------------------------------
            sum_leaveout = zeros(real_grid_size,1);
            for j = 1:real_grid_size
                leavein_f = bin_sum_f;
                leavein_f(j)=[];                    %leave out j'th element (f)
                leavein_V = bin_mean_V;
                leavein_V(j)=[];                    %leave out j'th element (exp(V))
                leaveout_V = bin_mean_V(j);         %left out element of exp(V)
                sum_leaveout(j) = sum(leavein_f.*leavein_V./(leavein_V+leaveout_V));
            end

            %--------------------------------------------------------------------------
            % Part 3: calculate lambda1*sum_i{(g_i*(1-delta)*(1-rho).*sum_leaveout}
            % Use this to obtain the overall model EE transition probability 
            %--------------------------------------------------------------------------
            eetrans_model = lambda1*(1-bin_mean_delta).*(1-bin_mean_rho).*...
                            bin_sum_g.*sum_leaveout;
            pr_eetrans_model = sum(eetrans_model);
            criterion(l,2) = pr_eetrans_model;

            %Probability of endogenous E-E moves implied by the data
            pr_eetrans_data = sum(sum(M(2:end,2:end)))/...
                W*sum((1-delta_sec).*(1-rho_sec).*(est_dat_xn(:,pos_g_edat)));

            %Absolute difference in EE transition probability b/w model and data              
            diff_ee = abs(pr_eetrans_model-pr_eetrans_data);
            criterion(l,3) = diff_ee;
       end

        %--------------------------------------------------------------------------
        % Pick the optimal value of lambda1 and solve for 
        % (exp_V_i_star) and f's (f_i_star)
        %--------------------------------------------------------------------------
        criterion = sortrows(criterion,3);
        lambda1_star = criterion(1,1);

        %Recall that condition 1 is unrelated to optimal value of lambda1

        %Condition 2 
        %(1/(1-lambda1))*(1/W)*sum_i(M_in*exp(V_tilde_n)) = C1*exp(V_n)
        cond2_star = (1/(1-lambda1_star))*(1/W)*S(1,1)*exp_v_tilde(1);

        %Condition 3
        %Subtract cond2 from cond1 to get the constant times exp(V_i)
        %C1[exp(V_i)+exp(V_n)] - C1*exp(V_n) = C1*exp(V_i)
        cond3_star = cond1 - cond2_star;

        %Truncate negative values of cond3_star by replacing them with the
        %minimum positive value of cond3_star
        negcond3_star = (cond3_star<0);
        truncval = min(cond3_star(logical(1-negcond3_star)));
        cond3_star(negcond3_star) = truncval;

        %Share of truncated obs
        share_trunc = sum(negcond3_star)/length(cond3_star);
        textshare = ['Fraction truncated obs =',' ', num2str(share_trunc)];
        disp(textshare);

        %Condition 4
        %f_i/C1 = (f_i^o*(C1*exp(V_i)+C1*exp(V_n)))/C1*exp(V_i)
        cond4_star = (est_dat_xn(:,pos_f_o_edat).*cond1)./cond3_star;

        %Get C1
        %Sum(f_i) = 1 so sum(f_i/C1) = 1/C1 
        C1_star = 1/(sum(cond4_star));

        %Get f_i
        f_i_star = cond4_star*C1_star;

        %Get exp(V_n)
        exp_V_n_star = cond2_star/C1_star;

        %Get exp(V_i) 
        exp_V_i_star = cond3_star/C1_star;

        %Update loop values of exp_V_i_0, exp_V_i_1, and 
        exp_V_i_0 = exp_V_i_1;
        exp_V_i_1 = exp_V_i_star;
        swexp_V_i_0 = est_dat_g(2:end).*exp_V_i_0;  %size wtd value 
        swexp_V_i_1 = est_dat_g(2:end).*exp_V_i_1;  %size wtd value

        %Update model-implied endog EE rate
        endog_eerate_new = criterion(1,2);    
        %Update model-implied endog EN rate
        % P(EN_model(i)) =
        % (1-lambda1)*(1-delta_i)*(1-rho_i)*g_i*(exp(V_n)/(exp(V_n)+exp(V_i))
        entrans_model = (1-lambda1_star)*(1-delta_sec).*(1-rho_sec).*est_dat_xn(:,pos_g_edat).*...
            (exp_V_n_star./(exp_V_n_star+exp_V_i_star));
        pr_entrans_model = sum(entrans_model);
        endog_enrate_new = pr_entrans_model;

        %Calculate updated correlation coefficient
        corr_mat_1 = corrcoef(swexp_V_i_0,swexp_V_i_1);
        corr_1 = corr_mat_1(1,2);

        %Update counter
        counter = counter+1;
    end

    %parameters...
    modelprmout = zeros(7,1);

    %lambda_1_star%
    modelprmout(1) = lambda1_star;
    %delta_bar
    modelprmout(2) = delta_bar;
    %rho_bar
    modelprmout(3) = rho_bar;
    %P(EE endog) model
    modelprmout(4) = criterion(1,2);
    %P(EE endog) data
    modelprmout(5) = sum(sum(M(2:end,2:end)))/...
                W*sum((1-delta_sec).*(1-rho_sec).*(est_dat_xn(:,pos_g_edat)));
    %P(EN endog) model
    modelprmout(6) = pr_entrans_model;
    %P(EN endog) data
    modelprmout(7) = sum(sum(M(1,2:end)))/...
                W*sum((1-delta_sec).*(1-rho_sec).*(est_dat_xn(:,pos_g_edat)));

    %labels...
    labels = ["lambda1_star","delta_bar","rho_bar","P(EE endog) model",...
              "P(EE endog) data", "P(EN endog) model", "P(EN endog) data"]';

    %Table
    modelprmtab = table(labels,modelprmout);
    writetable(modelprmtab,fnameout1);

    disp('Writing file to csv...');

    %Output values
    exp_V = full([exp_V_n_star; exp_V_i_star]); 
    exp_V = [est_id,exp_V];
    dlmwrite(fnameout2,exp_V,'precision','%10.9g');
    
end


